************************************************************************
/*			Filename: 2021-08-10 Code for conducting full analysis of missing decisions over all time.do */
/*			Purpose: analysis of document to docket completeness for all times */
/*			Author: Jason Rantanen		*/
/*			Date created: 2021-07-07 */
/*			Input files: docket dataset, document dataset */
************************************************************************

//Updated on 2021-08-10

//Make sure to change directory to the directory where the data files are located. 

cd "/Users/jrantanen/Documents/Scholarship/2021 Response to Missing Decisions/Final Data and Code/"

/* To make pretty tables that export to Word, download this program, and use command "asdoc" tab */
ssc install asdoc

**********
/* Step 1: Set up document dataset */
**********

use "appeals 2021-09-10.dta", clear

drop if docType=="Errata" //Errata are not included in the analysis

keep uniqueID CloudLink docType Replaced Replaced_Notes Appeal_Dockets url docDate DataSource 

gen Appeal_Dockets_Full=Appeal_Dockets

//NOTE: THE FOLLOWING CODE MAY TAKE 2-3 MINUTES TO COMPLETE

split Appeal_Dockets, generate(Short) parse(;) 
reshape long Short, i(uniqueID) j(number) 
drop if Short=="" & number!=1 
drop Appeal_Dockets
rename Short Appeal_Dockets
drop number

//Data is now long with respect to both appeal dockets and documents
sort docType
tab docType
sort docType
encode docType, gen(docType_n)
recode docType_n (4=2) (3=4) (2=3) (.=5)
label define docType 1 Opinion 2 "Rule 36" 3 Order 4 Other 5 Missing, replace
label values docType_n docType
tab docType_n

sort docType_n

duplicates report Appeal_Dockets

sort Appeal_Dockets docType_n
quietly by Appeal_Dockets: gen dup=_n
tab dup

//some of appeal dockets have multiple documents associated with them.  We will only be keeping the first iteration, prioritizing Opinion > Rule 36 > Order.  

duplicates drop Appeal_Dockets, force

tab docType_n

//data is now unique with respect to appeal docket only, so no need to reshape.  If all the documents are to be retained, dactivate the above line and activate the below line; note that there will be 19 iterations of the document-level variables. 

//reshape wide uniqueID docType docType_n Replaced Replaced_Notes CloudLink Appeal_Dockets_Full url, i(Appeal_Dockets) j(dup)


//At this point, the dataset contains all appeal docket numbers that have an opinion, Rule 36, or order in the Compendium.  

save "appeals filed with terminating documents in Compendium", replace

**********
/* Step 2: Merge document dataset into docket dataset and run analysis */
**********

use "2021-08-06 CAFC Dockets.dta", clear

merge 1:1 Appeal_Dockets using "appeals filed with terminating documents in Compendium"

tab Year_Appeal_Filed _merge

tab Year_Appeal_Filed DataSource, missing

replace docType="" if DataSource=="PACER" //The DataSource field indicates whether the document was obtained from the FEderal Circuit website or another source.  Documents associated with the Miscellaneous dockets project were collected directly from PACER.  These were not available on the court's website and thus are not considered in this analysis. 

replace docType="No Document" if docType==""

tab Year_Appeal_Filed docType, missing
tab Year_Appeal_Filed docType if Year_Appeal_Filed>2007 & Year_Appeal_Filed < 2019, missing

asdoc tab Year_Appeal_Filed docType, missing label font(Times New Roman) fs(12) save(tables2.doc) replace title(Documents available on CAFC website compared to all dockets)

//Note: Records with no date are documents that haven't yet been coded for year.  Drop these from any tables.  

save "final dataset comparision - all years.dta", replace

**********
/* Step 3: analysis of documents in the Compendium from the Federal Circuit's website */
**********

use "appeals 2021-09-10.dta", clear

drop if docType=="Errata" //Errata are not included in the analysis

gen year=year(docDate)

asdoc tab year if DataSource=="CAFC Website", missing label font(Times New Roman) fs(12) save(tables2.doc) append title(documents in Compendium by year)
